In [1]:
#import dataset
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd


dataset = pd.read_csv("/Users/Desktop/beathika treatment/Kimchi_dataset.csv")
dataset.columns
Out[1]:
Index(['Date', 'Price', 'Total Volume', 'Total Boxes', 'Small Boxes',
       'Large Boxes', 'XLarge Boxes', 'Region'],
      dtype='object')
In [2]:
dataset
Out[2]:
Date Price Total Volume Total Boxes Small Boxes Large Boxes XLarge Boxes Region
0 3/25/2018 1.71 2321.82 2006.46 1996.46 10.00 0.0 Seoul
1 3/18/2018 1.66 3154.45 2580.60 2577.27 3.33 0.0 Seoul
2 3/11/2018 1.68 2570.52 2209.29 2209.29 0.00 0.0 Seoul
3 3/4/2018 1.48 3851.30 3242.98 3239.65 3.33 0.0 Seoul
4 2/25/2018 1.56 5356.63 4007.48 4007.48 0.00 0.0 Seoul
... ... ... ... ... ... ... ... ...
643 2/4/2018 1.63 17074.83 13498.67 13066.82 431.85 0.0 Boryeong
644 1/28/2018 1.71 13888.04 9264.84 8940.04 324.80 0.0 Boryeong
645 1/21/2018 1.87 13766.76 9394.11 9351.80 42.31 0.0 Boryeong
646 1/14/2018 1.93 16205.22 10969.54 10919.54 50.00 0.0 Boryeong
647 1/7/2018 1.62 17489.58 12014.15 11988.14 26.01 0.0 Boryeong

648 rows × 8 columns

In [3]:
from pandas_profiling import ProfileReport

prof = ProfileReport(dataset)
prof
Out[3]:

In [ ]: